library(tidyverse)
library(knitr)
library(DT)Das Paket ggplot2 ist die meistgenutze Grafikbibliothek in R. Sein modularer Aufbau in aesthetics, coordinates und geometries erlaubt beliebige Freiheit in der Gestaltung von Plots.
Datensatz midwest aus dem Package ggplot2 enthält Daten einer Volkszählung.
data(midwest)
# Umwandlung einiger Variablen in Datentyp 'factor'
midwest <- midwest %>% mutate_at(vars(county, state, inmetro, category), as.factor)
# Show sample
sample_n(midwest, 10) %>% DT::datatable(width = 700, options=list(scrollX = TRUE))Verteilung einer numerischen Variable
midwest %>%
ggplot(aes(x = percasian / 100)) +
geom_histogram(bins = 50, color=1, fill='#d77e2d') +
scale_x_continuous(labels=scales::percent) +
labs(x='Percent asian of tot. pop.', y = 'Count',
title='Distribution of Asian percentage per district') Vergleich einer Verteilung mit Normalverteilung
midwest %>%
ggplot(aes(sample=percasian)) + geom_qq() + stat_qq_line()
midwest %>%
ggplot(aes(sample=log10(percasian))) + geom_qq() + stat_qq_line()Verteilung einer numerischen Variable im Vergleich mit wenigen Kategorien
midwest %>%
ggplot(aes(x = percasian / 100, fill = inmetro)) +
geom_density(alpha=.5) +
scale_x_continuous(labels = scales::percent) +
scale_fill_discrete(labels = c('Yes', 'No')) +
labs(x='Percent asian of tot. pop.', y = 'Count',
title='Distribution of Asian percentage per district',
fill = 'In metro area')Verteilung einer numerischen Variable über mehrere Kategorien
midwest %>%
ggplot(aes(x = state %>% fct_reorder(percollege), y = percollege, fill=state)) +
geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize=.6) +
geom_boxplot(alpha = .3, outlier.size = 0) +
labs(x = 'State', y = 'College education in percent of tot. pop.')Relation zwischen zwei numerischen Variablen
midwest %>%
ggplot(aes(x=area, y=poptotal)) +
geom_point(alpha=.5, color='#a52dd7') +
geom_smooth(method="lm") +
scale_y_log10(labels= scales::comma) +
labs(x = 'Area', y = 'Total population',
title='Population per Area',
subtitle = 'in log scale')midwest %>%
ggplot(aes(x=area, y=poptotal)) +
geom_point(alpha=.5, color='#a52dd7') +
geom_smooth(method="lm") +
scale_y_log10(labels= scales::comma) +
labs(x = 'Area', y = 'Total population',
title='Population per Area',
subtitle = 'in log scale') +
facet_wrap(vars(state))library(GGally)
midwest %>%
select(percollege, percbelowpoverty, percblack, percasian, inmetro) %>%
GGally::ggpairs(mapping=aes(color=inmetro))midwest %>%
ggplot(aes(state, fill = inmetro)) + geom_bar()
midwest %>%
ggplot(aes(state, fill = inmetro)) + geom_bar(position='dodge')midwest %>%
ggplot(aes(state, fill = inmetro, y = poptotal)) + geom_bar(stat='sum') +
scale_y_continuous(labels=scales::comma)
midwest %>%
ggplot(aes(state, fill = inmetro)) + geom_bar(position='fill') +
scale_y_continuous(labels = scales::percent)# Build a Time series data set
day <- Sys.Date() - 0:364
value <- runif(365) + seq(-140, 224)^2 / 10000
tsdata <- tibble(day, value)
p <- tsdata %>%
ggplot(aes(day, value)) +
geom_line() +
scale_x_date(
#date_labels = '%Y-%m',
date_breaks = '3 months'
)
pInteraktivität mit ggplotly
library(plotly)
ggplotly(p)Interaktivität mit dygraphs
library(dygraphs)
xtsdata <- tsdata %>%
as.data.frame() %>%
column_to_rownames("day") %>%
xts::as.xts()
xtsdata %>% dygraph() %>% dyRangeSelector()